library(ggplot2)
library(dplyr)
library(corrplot)
library(readxl)
library(lmtest) 
library(forecast)
library(DIMORA)
library(fpp2)
library(tidyr)
library(lubridate)
number_of_employees <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/number-of-employees.xlsx")
quarterly_net_income <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/quarterly-net-income.xlsx")
research_development_spending <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/research-development-spending.xlsx")
revenue_by_quarter <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/revenue-by-quarter.xlsx")
revenue_by_segment <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/revenue-by-segment.xlsx")
# Rimuovere le prime 4 righe e l'ultima riga
revenue_by_quarter <- revenue_by_quarter %>%
  slice(5:(n() - 1))

research_development_spending <- research_development_spending %>%
  slice(4:n())

number_of_employees <- number_of_employees %>%
  slice(2:n())
# Normalizzare i dati
normalize <- function(x) {
  return((x - mean(x)) / sd(x))
}

# Ripetere i valori annuali per ciascuno dei quattro trimestri
repeat_annual_data <- function(annual_data) {
  return(rep(annual_data, each = 4))
}

employees_repeated <- repeat_annual_data(number_of_employees$employees)
spending_repeated <- repeat_annual_data(research_development_spending$billion)

# Normalizzare i dati annuali ripetuti
n_employees <- normalize(employees_repeated)
n_spending <- normalize(spending_repeated)

n_revenue <- normalize(revenue)
n_net_income <- normalize(net_income)

# Creare un dataframe combinato
combined_data <- data.frame(
  Quarter = tt,
  Revenue = n_revenue,
  Net_Income = n_net_income,
  Employees = n_employees,
  Spending = n_spending
)

# Trasformare il dataframe in formato lungo
combined_data_long <- combined_data %>%
  pivot_longer(cols = c("Revenue", "Net_Income", "Employees", "Spending"), names_to = "Metric", values_to = "Value")

# Creare il grafico
ggplot(data = combined_data_long, aes(x = Quarter, y = Value, color = Metric)) +
  geom_line() +
  labs(x = "Quarter", y = "Value", title = "Quarterly Revenue and Net Income") +
  theme_minimal()

# Sostituisci i valori NA con zero
revenue_by_segment[is.na(revenue_by_segment)] <- 0

# Trasforma i dati in formato lungo per ggplot2
df_long <- revenue_by_segment %>%
  pivot_longer(cols = -year, names_to = "category", values_to = "value")

# Seleziona una palette di colori con abbastanza variazioni
palette_colors <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", 
                    "#ffd92f", "#e5c494", "#b3b3b3", "#1b9e77", "#d95f02", 
                    "#7570b3", "#e7298a", "#66a61e", "#e6ab02", "#a6761d", "#666666")

# Crea il grafico
ggplot(df_long, aes(x = factor(year), y = value, fill = category)) +
  geom_bar(stat = "identity", position = "stack") +  # Usa "stack" per impilare le barre
  labs(x = "Year", y = "Value", title = "Annual Data by Category") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position = "right",  # Sposta la legenda a destra
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8)) +
  scale_fill_manual(values = palette_colors) +
  guides(fill = guide_legend(ncol = 1, byrow = TRUE))  # Migliora la leggibilità della legenda

# Assicurati che tutte le colonne utilizzate per la correlazione siano numeriche
# Supponiamo che i dati siano nelle colonne 2 delle rispettive tabelle (adatta l'indice di colonna se necessario)
data_combined <- data.frame(
  revenue = revenue_by_quarter$billion,
  net_income = quarterly_net_income$income,
  employees = employees_repeated,
  rnd_spending = spending_repeated
)

# Calcolo della matrice di correlazione
correlation_matrix <- cor(data_combined, use = "complete.obs")

# Visualizzazione della matrice di correlazione
print(correlation_matrix)
               revenue net_income employees rnd_spending
revenue      1.0000000  0.4142986 0.8253796    0.8352008
net_income   0.4142986  1.0000000 0.1724034    0.1767102
employees    0.8253796  0.1724034 1.0000000    0.9728936
rnd_spending 0.8352008  0.1767102 0.9728936    1.0000000
# Se desideri una visualizzazione grafica
library(corrplot)
corrplot(correlation_matrix, method = "circle")


# Creazione delle serie temporali
revenue.ts <- ts(revenue_by_quarter$billion, frequency = 4)
net_income.ts <- ts(quarterly_net_income$income, frequency = 4)
employees_repeated <- ts(employees_repeated, frequency = 4)
spending_repeated <- ts(spending_repeated, frequency = 4)

# Differenziazione delle serie temporali
diff_revenue <- diff(revenue.ts)
diff_net_income <- diff(net_income.ts)
diff_employees <- diff(employees_repeated)
diff_spending <- diff(spending_repeated)

# Creazione del dataframe con le serie differenziate
data_combined_diff <- data.frame(
  diff_revenue = as.numeric(diff_revenue),
  diff_net_income = as.numeric(diff_net_income),
  diff_employees = as.numeric(diff_employees),
  diff_spending = as.numeric(diff_spending)
)

# Calcolo della matrice di correlazione
correlation_matrix_diff <- cor(data_combined_diff, use = "complete.obs")

# Visualizzazione della matrice di correlazione
print(correlation_matrix_diff)
                diff_revenue diff_net_income diff_employees diff_spending
diff_revenue       1.0000000       0.4969756     -0.4245670    -0.4508853
diff_net_income    0.4969756       1.0000000     -0.2592996    -0.2821900
diff_employees    -0.4245670      -0.2592996      1.0000000     0.7179994
diff_spending     -0.4508853      -0.2821900      0.7179994     1.0000000
# Visualizzazione grafica della matrice di correlazione
corrplot(correlation_matrix_diff, method = "circle")

tt <- 1:NROW(revenue_by_quarter)
revenue <- revenue_by_quarter$billion
net_income <- quarterly_net_income$income
revenue_by_quarter$quarter <- as.yearqtr(revenue_by_quarter$quarter, format = "Q%q '%y")

tsdisplay(revenue)


# Calcolo della media mobile (esempio con finestra di 12 mesi per dati mensili)
moving_average <- ma(revenue, order = 12)

ggplot(data = revenue_by_quarter, aes(x = tt)) +
  geom_line(aes(y = billion)) +
  geom_line(aes(y = moving_average), color = "red") +
  labs(x = "Quarter", y = "Billion", title = "Quarter vs. Billion")

NA
NA
NA
# Aggiungi colonne per anno e trimestre
revenue_by_quarter <- revenue_by_quarter %>%
  mutate(
    year = as.integer(format(quarter, "%Y")),
    quarter_num = as.integer(format(quarter, "%q"))
  )

# Crea un grafico stagionale
ggplot(revenue_by_quarter, aes(x = quarter_num, y = billion, group = year, color = factor(year))) +
  geom_line() +
  scale_x_continuous(breaks = 1:4, labels = c("Q1", "Q2", "Q3", "Q4")) +
  labs(x = "Quarter", y = "Revenue (Billion)", title = "Seasonal Plot: Revenue by Quarter", color = "Year") +
  theme_minimal() +
  theme(legend.position = "right")

#Dato che i dati mostrano una tendenza, potrebbe essere necessario differenziare i dati per renderli stazionari prima di applicare un modello ARIMA.

# Differenziazione dei dati
diff_revenue <- diff(revenue)

# Visualizzazione dei dati differenziati
plot(diff_revenue, type = "l", main = "Differenced Revenue")


# Test di stazionarietà (ad esempio, test di Dickey-Fuller aumentato)
library(tseries)
adf.test(diff_revenue)

    Augmented Dickey-Fuller Test

data:  diff_revenue
Dickey-Fuller = -1.6424, Lag order = 3, p-value = 0.7197
alternative hypothesis: stationary
# Creare la serie temporale
revenue_ts <- ts(revenue_by_quarter$billion, start=c(2007, 1), frequency=4)

# Decomporre la serie temporale
decomposed_revenue <- stl(revenue_ts, s.window="periodic")

# Visualizzare la decomposizione
autoplot(decomposed_revenue) +
  labs(title = "Decomposition of Quarterly Revenue", x = "Time")


# Estrarre la componente stagionale
seasonal_component <- decomposed_revenue$time.series[, "seasonal"]

# Creare il grafico della stagionalità
ggplot(data = data.frame(Quarter = time(revenue_ts), Seasonal = seasonal_component), 
       aes(x = Quarter, y = Seasonal)) +
  geom_line(color = "blue") +
  labs(x = "Quarter", y = "Seasonal Component", title = "Seasonal Component of Quarterly Revenue") +
  theme_minimal()

library(forecast)
library(tseries)

# Creare la serie temporale
revenue_ts <- ts(revenue_by_quarter$billion, start=c(2007, 1), frequency=4)

# Differenziare i dati per renderli stazionari
diff_revenue <- diff(revenue_ts, differences=1)

# Calcolare e tracciare l'ACF e la PACF della serie differenziata
acf(diff_revenue, main="ACF of Differenced Revenue")

pacf(diff_revenue, main="PACF of Differenced Revenue")


# Utilizzare auto.arima per trovare il miglior modello ARIMA
best_arima_model <- auto.arima(revenue_ts)
summary(best_arima_model)
Series: revenue_ts 
ARIMA(2,0,2)(0,1,0)[4] with drift 

Coefficients:
         ar1      ar2      ma1     ma2   drift
      1.2299  -0.5551  -0.4580  0.8865  0.1168
s.e.  0.1432   0.1442   0.1082  0.0810  0.0466

sigma^2 = 0.1217:  log likelihood = -21.63
AIC=55.26   AICc=56.84   BIC=67.83

Training set error measures:
                       ME      RMSE      MAE        MPE     MAPE      MASE      ACF1
Training set 0.0003898554 0.3233628 0.226715 0.02214281 2.493414 0.3883215 0.0473361
residuals_arima <- residuals(best_arima_model)

# Visualizzare i residui del modello
checkresiduals(best_arima_model)

    Ljung-Box test

data:  Residuals from ARIMA(2,0,2)(0,1,0)[4] with drift
Q* = 5.5134, df = 4, p-value = 0.2386

Model df: 4.   Total lags used: 8

# Prevedere i prossimi 8 trimestri
forecast_horizon <- 12
forecast_revenue <- forecast(best_arima_model, h = forecast_horizon)

# Visualizzare le previsioni
autoplot(forecast_revenue) +
  labs(title = "Revenue Forecast with ARIMA", x = "Date", y = "Revenue (Billion)") +
  autolayer(fitted(best_arima_model), series = "Fitted", PI = FALSE)
Warning: Ignoring unknown parameters: `PI`

air.model <- Arima(window(revenue_ts,end=2022),order=c(2,0,2),
                   seasonal=list(order=c(0,1,0),period=4),lambda=0)
plot(forecast(air.model,h=12))
lines(revenue_ts)

# Funzione per visualizzare il modello
plot_model <- function(model, data) {
  plot(data, ylab="Billion", xlab="Time")
  lines(fitted(model), col=2)
}

plot_model_forecast <- function(model, data, forecast_horizon) {
  forecasted_values <- forecast(model, h = forecast_horizon)
  plot(forecasted_values, ylab = "Billion", xlab = "Time")
  lines(fitted(model), col = 2)
}

# Modello con trend e stagionalità
fit_lm <- tslm(revenue.ts ~ trend + season)
summary(fit_lm)

Call:
tslm(formula = revenue.ts ~ trend + season)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.6546 -0.6215 -0.1046  0.8287  1.6255 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.712785   0.300266  19.026  < 2e-16 ***
trend       0.079991   0.006233  12.834  < 2e-16 ***
season2     0.563759   0.325112   1.734   0.0881 .  
season3     0.584393   0.325291   1.797   0.0775 .  
season4     2.161903   0.325590   6.640  1.1e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9194 on 59 degrees of freedom
Multiple R-squared:  0.7917,    Adjusted R-squared:  0.7776 
F-statistic: 56.07 on 4 and 59 DF,  p-value: < 2.2e-16
# Verifica dei residui
checkresiduals(fit_lm)

    Breusch-Godfrey test for serial correlation of order up to 8

data:  Residuals from Linear regression model
LM test = 51.71, df = 8, p-value = 1.915e-08

# Visualizzazione del modello
plot_model(fit_lm, revenue.ts)


plot_model_forecast(fit_lm, revenue.ts, forecast_horizon = 8)

# Modello Holt-Winters
fit_hw <- hw(revenue.ts, seasonal = "additive")
summary(fit_hw)

Forecast method: Holt-Winters' additive method

Model Information:
Holt-Winters' additive method 

Call:
 hw(y = revenue.ts, seasonal = "additive") 

  Smoothing parameters:
    alpha = 0.6867 
    beta  = 0.3438 
    gamma = 0.3133 

  Initial states:
    l = 5.2684 
    b = -0.0168 
    s = 1.3623 -0.2869 -0.3252 -0.7501

  sigma:  0.3874

     AIC     AICc      BIC 
154.2344 157.5677 173.6643 

Error measures:
                     ME      RMSE       MAE       MPE     MAPE      MASE       ACF1
Training set 0.01813475 0.3623624 0.2708647 0.3072365 3.143009 0.4639418 0.02346945

Forecasts:
# Verifica dei residui
checkresiduals(fit_hw)

    Ljung-Box test

data:  Residuals from Holt-Winters' additive method
Q* = 11.658, df = 8, p-value = 0.1671

Model df: 0.   Total lags used: 8

# Visualizzazione del modello
plot_model(fit_hw, revenue.ts)


plot_model_forecast(fit_hw, revenue.ts, forecast_horizon = 8)

# Modello di regressione lineare con componente quadratica
fit_lm_quad <- tslm(revenue.ts ~ trend + I(trend^2) + season)
summary(fit_lm_quad)

Call:
tslm(formula = revenue.ts ~ trend + I(trend^2) + season)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5486 -0.5636 -0.3143  0.6170  1.7710 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.8565249  0.3699855  13.126  < 2e-16 ***
trend        0.1579417  0.0231887   6.811 6.08e-09 ***
I(trend^2)  -0.0011992  0.0003457  -3.469 0.000992 ***
season2      0.5613607  0.2984077   1.881 0.064969 .  
season3      0.5819949  0.2985721   1.949 0.056106 .  
season4      2.1619026  0.2988452   7.234 1.18e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8439 on 58 degrees of freedom
Multiple R-squared:  0.8275,    Adjusted R-squared:  0.8126 
F-statistic: 55.65 on 5 and 58 DF,  p-value: < 2.2e-16
# Verifica dei residui
checkresiduals(fit_lm_quad)

    Breusch-Godfrey test for serial correlation of order up to 9

data:  Residuals from Linear regression model
LM test = 53.8, df = 9, p-value = 2.06e-08

# Visualizzazione del modello
plot_model(fit_lm_quad, revenue.ts)


plot_model_forecast(fit_lm_quad, revenue.ts, forecast_horizon = 8)

# Modello GAM con specifica dei nodi
fit_gam <- gam(revenue ~ s(time, k = 10) + s(quarter, bs = "cc", k = 4), data = revenue_gam_df)
summary(fit_gam)

Family: gaussian 
Link function: identity 

Formula:
revenue ~ s(time, k = 10) + s(quarter, bs = "cc", k = 4)

Parametric coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   9.1400     0.1087   84.11   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Approximate significance of smooth terms:
             edf Ref.df      F p-value    
s(time)    6.485  7.626 33.139  <2e-16 ***
s(quarter) 1.127  2.000  1.646  0.0625 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

R-sq.(adj) =  0.801   Deviance explained = 82.5%
GCV = 0.87314  Scale est. = 0.75565   n = 64
# Verifica dei residui
residuals_gam <- residuals(fit_gam)
plot(residuals_gam, ylab = "residuals")

dwtest(residuals_gam ~ revenue_gam_df$time)

    Durbin-Watson test

data:  residuals_gam ~ revenue_gam_df$time
DW = 2.5751, p-value = 0.9871
alternative hypothesis: true autocorrelation is greater than 0
# Visualizzazione del modello
plot(revenue.ts, ylab = "Billion", xlab = "Time")
lines(fitted(fit_gam), col = 2)

NA
NA
---
title: "R Notebook"
output: html_notebook
---

```{r}
library(ggplot2)
library(dplyr)
library(corrplot)
library(readxl)
library(lmtest) 
library(forecast)
library(DIMORA)
library(fpp2)
library(tidyr)
library(lubridate)
```

```{r}
number_of_employees <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/number-of-employees.xlsx")
quarterly_net_income <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/quarterly-net-income.xlsx")
research_development_spending <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/research-development-spending.xlsx")
revenue_by_quarter <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/revenue-by-quarter.xlsx")
revenue_by_segment <- read_excel("~/Desktop/Unipd/GitHub/Business-Project/Business Project/revenue-by-segment.xlsx")
```

```{r}
# Rimuovere le prime 4 righe e l'ultima riga
revenue_by_quarter <- revenue_by_quarter %>%
  slice(5:(n() - 1))

research_development_spending <- research_development_spending %>%
  slice(4:n())

number_of_employees <- number_of_employees %>%
  slice(2:n())
```


```{r}
# Normalizzare i dati
normalize <- function(x) {
  return((x - mean(x)) / sd(x))
}

# Ripetere i valori annuali per ciascuno dei quattro trimestri
repeat_annual_data <- function(annual_data) {
  return(rep(annual_data, each = 4))
}

employees_repeated <- repeat_annual_data(number_of_employees$employees)
spending_repeated <- repeat_annual_data(research_development_spending$billion)

# Normalizzare i dati annuali ripetuti
n_employees <- normalize(employees_repeated)
n_spending <- normalize(spending_repeated)

n_revenue <- normalize(revenue)
n_net_income <- normalize(net_income)

# Creare un dataframe combinato
combined_data <- data.frame(
  Quarter = tt,
  Revenue = n_revenue,
  Net_Income = n_net_income,
  Employees = n_employees,
  Spending = n_spending
)

# Trasformare il dataframe in formato lungo
combined_data_long <- combined_data %>%
  pivot_longer(cols = c("Revenue", "Net_Income", "Employees", "Spending"), names_to = "Metric", values_to = "Value")

# Creare il grafico
ggplot(data = combined_data_long, aes(x = Quarter, y = Value, color = Metric)) +
  geom_line() +
  labs(x = "Quarter", y = "Value", title = "Quarterly Revenue and Net Income") +
  theme_minimal()

```

```{r}
# Sostituisci i valori NA con zero
revenue_by_segment[is.na(revenue_by_segment)] <- 0

# Trasforma i dati in formato lungo per ggplot2
df_long <- revenue_by_segment %>%
  pivot_longer(cols = -year, names_to = "category", values_to = "value")

# Seleziona una palette di colori con abbastanza variazioni
palette_colors <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", 
                    "#ffd92f", "#e5c494", "#b3b3b3", "#1b9e77", "#d95f02", 
                    "#7570b3", "#e7298a", "#66a61e", "#e6ab02", "#a6761d", "#666666")

# Crea il grafico
ggplot(df_long, aes(x = factor(year), y = value, fill = category)) +
  geom_bar(stat = "identity", position = "stack") +  # Usa "stack" per impilare le barre
  labs(x = "Year", y = "Value", title = "Annual Data by Category") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.position = "right",  # Sposta la legenda a destra
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8)) +
  scale_fill_manual(values = palette_colors) +
  guides(fill = guide_legend(ncol = 1, byrow = TRUE))  # Migliora la leggibilità della legenda

```

```{r}
# Assicurati che tutte le colonne utilizzate per la correlazione siano numeriche
# Supponiamo che i dati siano nelle colonne 2 delle rispettive tabelle (adatta l'indice di colonna se necessario)
data_combined <- data.frame(
  revenue = revenue_by_quarter$billion,
  net_income = quarterly_net_income$income,
  employees = employees_repeated,
  rnd_spending = spending_repeated
)

# Calcolo della matrice di correlazione
correlation_matrix <- cor(data_combined, use = "complete.obs")

# Visualizzazione della matrice di correlazione
print(correlation_matrix)

# Se desideri una visualizzazione grafica
library(corrplot)
corrplot(correlation_matrix, method = "circle")

# Creazione delle serie temporali
revenue.ts <- ts(revenue_by_quarter$billion, frequency = 4)
net_income.ts <- ts(quarterly_net_income$income, frequency = 4)
employees_repeated <- ts(employees_repeated, frequency = 4)
spending_repeated <- ts(spending_repeated, frequency = 4)

# Differenziazione delle serie temporali
diff_revenue <- diff(revenue.ts)
diff_net_income <- diff(net_income.ts)
diff_employees <- diff(employees_repeated)
diff_spending <- diff(spending_repeated)

# Creazione del dataframe con le serie differenziate
data_combined_diff <- data.frame(
  diff_revenue = as.numeric(diff_revenue),
  diff_net_income = as.numeric(diff_net_income),
  diff_employees = as.numeric(diff_employees),
  diff_spending = as.numeric(diff_spending)
)

# Calcolo della matrice di correlazione
correlation_matrix_diff <- cor(data_combined_diff, use = "complete.obs")

# Visualizzazione della matrice di correlazione
print(correlation_matrix_diff)

# Visualizzazione grafica della matrice di correlazione
corrplot(correlation_matrix_diff, method = "circle")

```


```{r}
tt <- 1:NROW(revenue_by_quarter)
revenue <- revenue_by_quarter$billion
net_income <- quarterly_net_income$income
revenue_by_quarter$quarter <- as.yearqtr(revenue_by_quarter$quarter, format = "Q%q '%y")

tsdisplay(revenue)

# Calcolo della media mobile (esempio con finestra di 12 mesi per dati mensili)
moving_average <- ma(revenue, order = 12)

ggplot(data = revenue_by_quarter, aes(x = tt)) +
  geom_line(aes(y = billion)) +
  geom_line(aes(y = moving_average), color = "red") +
  labs(x = "Quarter", y = "Billion", title = "Quarter vs. Billion")



```
```{r}
# Aggiungi colonne per anno e trimestre
revenue_by_quarter <- revenue_by_quarter %>%
  mutate(
    year = as.integer(format(quarter, "%Y")),
    quarter_num = as.integer(format(quarter, "%q"))
  )

# Crea un grafico stagionale
ggplot(revenue_by_quarter, aes(x = quarter_num, y = billion, group = year, color = factor(year))) +
  geom_line() +
  scale_x_continuous(breaks = 1:4, labels = c("Q1", "Q2", "Q3", "Q4")) +
  labs(x = "Quarter", y = "Revenue (Billion)", title = "Seasonal Plot: Revenue by Quarter", color = "Year") +
  theme_minimal() +
  theme(legend.position = "right")
```



```{r}
#Dato che i dati mostrano una tendenza, potrebbe essere necessario differenziare i dati per renderli stazionari prima di applicare un modello ARIMA.

#La differenziazione è una tecnica utilizzata nell'analisi delle serie temporali per trasformare una serie non stazionaria in una stazionaria. Una serie temporale è stazionaria quando le sue proprietà statistiche, come la media e la varianza, non cambiano nel tempo. La stazionarietà è un requisito fondamentale per l'applicazione di molti modelli di serie temporali, come l'ARIMA, in quanto permette di assumere che i processi generati dai dati siano invarianti nel tempo.

# Differenziazione dei dati
diff_revenue <- diff(revenue)

# Visualizzazione dei dati differenziati
plot(diff_revenue, type = "l", main = "Differenced Revenue")

# Test di stazionarietà (ad esempio, test di Dickey-Fuller aumentato)
library(tseries)
adf.test(diff_revenue)


```



```{r}
# Creare la serie temporale
revenue_ts <- ts(revenue_by_quarter$billion, start=c(2007, 1), frequency=4)

# Decomporre la serie temporale
decomposed_revenue <- stl(revenue_ts, s.window="periodic")

# Visualizzare la decomposizione
autoplot(decomposed_revenue) +
  labs(title = "Decomposition of Quarterly Revenue", x = "Time")

# Estrarre la componente stagionale
seasonal_component <- decomposed_revenue$time.series[, "seasonal"]

# Creare il grafico della stagionalità
ggplot(data = data.frame(Quarter = time(revenue_ts), Seasonal = seasonal_component), 
       aes(x = Quarter, y = Seasonal)) +
  geom_line(color = "blue") +
  labs(x = "Quarter", y = "Seasonal Component", title = "Seasonal Component of Quarterly Revenue") +
  theme_minimal()

```


```{r}
library(forecast)
library(tseries)

# Creare la serie temporale
revenue_ts <- ts(revenue_by_quarter$billion, start=c(2007, 1), frequency=4)

# Differenziare i dati per renderli stazionari
diff_revenue <- diff(revenue_ts, differences=1)

# Calcolare e tracciare l'ACF e la PACF della serie differenziata
acf(diff_revenue, main="ACF of Differenced Revenue")
pacf(diff_revenue, main="PACF of Differenced Revenue")

# Utilizzare auto.arima per trovare il miglior modello ARIMA
best_arima_model <- auto.arima(revenue_ts)
summary(best_arima_model)

residuals_arima <- residuals(best_arima_model)

# Visualizzare i residui del modello
checkresiduals(best_arima_model)

# Prevedere i prossimi 8 trimestri
forecast_horizon <- 12
forecast_revenue <- forecast(best_arima_model, h = forecast_horizon)

# Visualizzare le previsioni
autoplot(forecast_revenue) +
  labs(title = "Revenue Forecast with ARIMA", x = "Date", y = "Revenue (Billion)") +
  autolayer(fitted(best_arima_model), series = "Fitted", PI = FALSE)

```
```{r}
air.model <- Arima(window(revenue_ts,end=2022),order=c(2,0,2),
                   seasonal=list(order=c(0,1,0),period=4),lambda=0)
plot(forecast(air.model,h=12))
lines(revenue_ts)
```



```{r}
# Funzione per visualizzare il modello
plot_model <- function(model, data) {
  plot(data, ylab="Billion", xlab="Time")
  lines(fitted(model), col=2)
}

plot_model_forecast <- function(model, data, forecast_horizon) {
  forecasted_values <- forecast(model, h = forecast_horizon)
  plot(forecasted_values, ylab = "Billion", xlab = "Time")
  lines(fitted(model), col = 2)
}

# Modello con trend e stagionalità
fit_lm <- tslm(revenue.ts ~ trend + season)
summary(fit_lm)

# Verifica dei residui
checkresiduals(fit_lm)

# Visualizzazione del modello
plot_model(fit_lm, revenue.ts)

plot_model_forecast(fit_lm, revenue.ts, forecast_horizon = 8)

```


```{r}
# Modello Holt-Winters
fit_hw <- hw(revenue.ts, seasonal = "additive")
summary(fit_hw)

# Verifica dei residui
checkresiduals(fit_hw)

# Visualizzazione del modello
plot_model(fit_hw, revenue.ts)

plot_model_forecast(fit_hw, revenue.ts, forecast_horizon = 8)

```
```{r}
# Modello di regressione lineare con componente quadratica
fit_lm_quad <- tslm(revenue.ts ~ trend + I(trend^2) + season)
summary(fit_lm_quad)

# Verifica dei residui
checkresiduals(fit_lm_quad)

# Visualizzazione del modello
plot_model(fit_lm_quad, revenue.ts)

plot_model_forecast(fit_lm_quad, revenue.ts, forecast_horizon = 8)

```
```{r}
# Modello GAM con specifica dei nodi
fit_gam <- gam(revenue ~ s(time, k = 10) + s(quarter, bs = "cc", k = 4), data = revenue_gam_df)
summary(fit_gam)

# Verifica dei residui
residuals_gam <- residuals(fit_gam)
plot(residuals_gam, ylab = "residuals")
dwtest(residuals_gam ~ revenue_gam_df$time)

# Visualizzazione del modello
plot(revenue.ts, ylab = "Billion", xlab = "Time")
lines(fitted(fit_gam), col = 2)


```

